# -*- coding: UTF-8 -*-
import os
import findspark
findspark.init()
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
import pyspark.sql.functions as F
os.environ['JAVA_HOME'] = '/usr/java/jdk1.8.0_181-cloudera'
spark_session = SparkSession.builder.master("local[*]").appName("hive_test_1") \
    .config("hive.metastore.uris", "thrift://127.0.0.1:9083") \
    .enableHiveSupport().getOrCreate()
import time
jdsql= """

(SELECT
	id,
	jd_id,
	resume_id,1
	FROM
	rcn_prod.t_delivery_order 
	WHERE EXISTS ( SELECT 1 FROM rcn_prod.t_delivery_order_operation WHERE rcn_prod.t_delivery_order.id = rcn_prod.t_delivery_order_operation.order_id AND rcn_prod.t_delivery_order_operation.operation_type = 401 )
	)UNION
	(SELECT
	id,
	jd_id,
	resume_id,0
	FROM
	rcn_prod.t_delivery_order 
	WHERE EXISTS ( SELECT 1 FROM rcn_prod.t_delivery_order_operation WHERE rcn_prod.t_delivery_order.id = rcn_prod.t_delivery_order_operation.order_id AND rcn_prod.t_delivery_order_operation.operation_type = 402 )
	)
"""

s = time.time()

cv = spark_session.sql(jdsql)
print('order', cv.count())
print(time.time() - s)

cv.toPandas().to_csv('all_train_order_hive.csv')